import json

with open("random_date_data.json") as f:
    data = json.load(f)
date_dict = {}
text = []
for d in data:
    if d['date'] in date_dict:
        date_dict[d['date']] += 1
    else:
        date_dict[d['date']] = 1
    text.append(d.get('title', '') + ' ' + d.get('content', ''))



import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

df = pd.DataFrame({'string': text})
df['length'] = df['string'].apply(len)

# 定义区间边界
bins = [0, 500, 1000, 2000, np.inf]
labels = ['1-500', '501-1000', '1001-2000', '>2000']

# 将字符串长度分入不同的区间
df['zone'] = pd.cut(df['length'], bins=bins, labels=labels, right=False, include_lowest=True)

# 统计每个区间的字符串数量
zone_counts = df['zone'].value_counts().sort_index()

# 打印统计结果
for zone, count in zone_counts.items():
    print(f"区间 {zone} 的字符串数量: {count}")
